import pandas as pd
import pickle

en_word_2_index = {}
en_index_2_word = []
en_index = 0
ch_word_2_index = {}
ch_index_2_word = []
ch_index = 0


def get_datas():
    global en_index, ch_index
    received_datas = pd.read_csv('./datas/translate.csv')
    en_datas = list(received_datas["english"])
    ch_datas = list(received_datas["chinese"])
    # print(en_datas)
    # print(ch_datas)
    for E in en_datas:
        for e in E:
            if e not in en_index_2_word:
                en_index_2_word.append(e)
                en_word_2_index[e] = en_index
                en_index += 1
    for C in ch_datas:
        for c in C:
            if c not in ch_index_2_word:
                ch_index_2_word.append(c)
                ch_word_2_index[c] = ch_index
                ch_index += 1


get_datas()

# print(ch_index_2_word, len(ch_index_2_word))
# print(en_index_2_word, len(en_index_2_word))

with open('./datas/ch1.vec', 'wb') as f:
    pickle.dump(ch_word_2_index, f, -1)
    pickle.dump(ch_index_2_word, f, -1)

with open('./datas/en1.vec', 'wb') as f:
    pickle.dump(en_word_2_index, f, -1)
    pickle.dump(en_index_2_word, f, -1)

with open('./datas/ch1.vec', 'rb') as f:
    tmp1 = pickle.load(f)
    tmp2 = pickle.load(f)
    print(tmp2)

with open('./datas/en1.vec', 'rb') as f:
    tmp3 = pickle.load(f)
    tmp4 = pickle.load(f)
    print(tmp4)

